In [ ]:
import numpy as np
import theano
import theano.tensor as T
import lasagne
import os
In [ ]:
start_token = " "
with open("names") as f:
names = f.read()[:-1].split('\n')
names = [start_token+name for name in names]
In [ ]:
print('n samples = ', len(names))
for x in names[::1000]:
print(x)
In [ ]:
# all unique characters go here
token_set = <YOUR CODE: a list of all unique characters in names, including space>
tokens = list(token_set)
print('n_tokens = ', len(tokens))
assert 54 < len(tokens) < 56
Theano is built for numbers, not strings of characters. We'll feed our recurrent neural network with ids of characters from our dictionary.
To create such dictionary, let's assign each character with it's index in tokens list.
In [ ]:
token_to_id = <YOUR CODE: dictionary of symbol -> its identifier (index in tokens list)>
id_to_token = <dictionary of symbol identifier -> symbol itself>
In [ ]:
import matplotlib.pyplot as plt
%matplotlib inline
plt.hist(list(map(len, names)), bins=25)
# truncate names longer than MAX_LEN characters.
MAX_LEN = min([60, max(list(map(len, names)))])
# ADJUST IF YOU ARE UP TO SOMETHING SERIOUS
In [ ]:
names_ix = list(map(lambda name: list(map(token_to_id.get, name)), names))
# crop long names and pad short ones
for i in range(len(names_ix)):
names_ix[i] = names_ix[i][:MAX_LEN] # crop too long
if len(names_ix[i]) < MAX_LEN:
names_ix[i] += [token_to_id[" "]] * \
(MAX_LEN - len(names_ix[i])) # pad too short
assert len(set(map(len, names_ix))) == 1
names_ix = np.array(names_ix)
In [ ]:
from agentnet import Recurrence
from lasagne.layers import *
from agentnet.memory import *
from agentnet.resolver import ProbabilisticResolver
In [ ]:
sequence = T.matrix('token sequence', 'int64')
inputs = sequence[:, :-1]
targets = sequence[:, 1:]
l_input_sequence = InputLayer(shape=(None, None), input_var=inputs)
In [ ]:
# One step of rnn
class step:
# inputs
inp = InputLayer((None,), name='current character')
h_prev = InputLayer((None, 10), name='previous rnn state')
# recurrent part
emb = EmbeddingLayer(inp, len(tokens), 30, name='emb')
h_new = <YOUR CODE: concat emb and h_prev and feed them to DenseLayer. Everything must be lasagne layers>
next_token_probas = <YOUR CODE: compute probabilities for next tokens, should also be lasagne layer that uses h_new>
# pick next token from predicted probas
next_token = ProbabilisticResolver(next_token_probas)
In [ ]:
training_loop = Recurrence(
state_variables={step.h_new: step.h_prev},
input_sequences={step.inp: l_input_sequence},
tracked_outputs=[step.next_token_probas, ],
unroll_scan=False,
)
In [ ]:
# Model weights
weights = lasagne.layers.get_all_params(training_loop, trainable=True)
print(weights)
In [ ]:
predicted_probabilities = lasagne.layers.get_output(
training_loop[step.next_token_probas])
# If you use dropout do not forget to create deterministic version for evaluation
In [ ]:
loss = # <Loss function - a simple categorical crossentropy will do, maybe add some regularizer>
updates = lasagne.updates.adam(loss, weights)
In [ ]:
# training
train_step = theano.function([sequence], loss,
updates=training_loop.get_automatic_updates()+updates)
In [ ]:
n_steps = T.scalar(dtype='int32')
x0 = InputLayer([None], theano.shared(np.int32([token_to_id[' ']])))
feedback_loop = Recurrence(
state_variables={step.h_new: step.h_prev,
step.next_token: step.inp},
tracked_outputs=[step.next_token_probas, ],
state_init={step.next_token: x0},
batch_size=theano.shared(1),
n_steps=n_steps,
unroll_scan=False,
)
In [ ]:
generated_tokens = get_output(feedback_loop[step.next_token])
In [ ]:
generate_sample = theano.function(
[n_steps], generated_tokens, updates=feedback_loop.get_automatic_updates())
In [ ]:
def generate_string(length=MAX_LEN):
output_indices = generate_sample(length)[0]
return ''.join(tokens[i] for i in output_indices)
In [ ]:
generate_string()
In [ ]:
def sample_batch(data, batch_size):
rows = data[np.random.randint(0, len(data), size=batch_size)]
return rows
In [ ]:
print("Training ...")
# total N iterations
n_epochs = 100
# how many minibatches are there in the epoch
batches_per_epoch = 500
# how many training sequences are processed in a single function call
batch_size = 10
for epoch in xrange(n_epochs):
avg_cost = 0
for _ in range(batches_per_epoch):
avg_cost += train_step(sample_batch(names_ix, batch_size))
print("\n\nEpoch {} average loss = {}".format(
epoch, avg_cost / batches_per_epoch))
print("Generated names")
for i in range(10):
print(generate_string(),)
In [ ]:
In [ ]: